*=============================================================================
* Dataready
cap program drop pre_est_ready
program define pre_est_ready

	* Limited to the restricted sample, i.e. FIES matches with NSFIE
	import excel using "$Data/Kaisetsu.xlsx", sheet("tradable_classification_NSyr") clear first
	keep FIESCode FIEStoNS* Tradable
	qui drop if mi(Tradable)
	qui duplicates drop
	save "$Data/FIES_name.dta", replace

	use "$Data/RakutenProject.dta", clear
	keep FIES expenditure
	drop if mi(expenditure)
	duplicates drop
	rename FIES FIESCode
	merge 1:1 FIESCode using "$Data/FIES_name.dta", keep(3) nogen
	keep if !mi(FIEStoNS1999)|!mi(FIEStoNS2004)|!mi(FIEStoNS2009)|!mi(FIEStoNS2014)
	keep FIESCode FIEStoNS1999 FIEStoNS2004 FIEStoNS2009 FIEStoNS2014
	duplicates drop
	reshape long FIEStoNS, i(FIESCode) j(year)
	drop if mi(FIEStoNS)
	save "$Data/matchlist.dta", replace

	* Calculate internet intensity and share of expenditures
	use "$Data/NSFIE_totalhh_national.dta", clear
	keep if inlist(type_purchase,"Internet","All","Catalog","Department","Discount","Supermarket")
	expand 2 if inlist(type_purchase,"Department","Discount")	/*group together Department+Discount and Department+Discount+Supermarket*/
	bys year product_code type_purchase: replace type_purchase = "Department+Discount" if inlist(type_purchase,"Department","Discount") & _n == 1
	replace type_purchase = "Department+Discount+Supermarket" if inlist(type_purchase,"Department","Discount","Supermarket")
	egen total_exp = total(avg_exp), by(type_purchase product_code year)
	egen count_obs = count(avg_exp), by(type_purchase product_code year)
	qui replace total_exp = . if count_obs == 0
	keep type_purchase product_code description year total_exp
	duplicates drop
	rename total_exp avg_exp
	qui rename product_code FIEStoNS
	qui joinby FIEStoNS year using "$Data/matchlist.dta"
	
	* Calculate expenditure share by type or purchase
	qui bys FIEStoNS year type_purchase: gen sum = avg_exp if _n == 1
	
	qui egen total_catalog = total(sum) if type_purchase == "Catalog", by(year)
	qui egen total_internet = total(sum) if type_purchase == "Internet" & year ~= 1999, by(year)
	qui egen total_dd = total(sum) if type_purchase == "Department+Discount", by(year)
	qui egen total_dds = total(sum) if type_purchase == "Department+Discount+Supermarket", by(year)
	qui egen total_exp = total(sum) if type_purchase == "All", by(year)
	
	qui gen share_catalog = avg_exp/total_catalog if type_purchase == "Catalog"
	qui gen share_internet = avg_exp/total_internet if type_purchase == "Internet"
	qui gen share_dd = avg_exp/total_dd if type_purchase == "Department+Discount"
	qui gen share_dds = avg_exp/total_dds if type_purchase == "Department+Discount+Supermarket"
	qui gen share_exp = avg_exp/total_exp if type_purchase == "All"
	
	
	keep FIEStoNS year share_* description FIESCode
	
	foreach var in catalog internet exp dd dds {
		gsort FIEStoNS year -share_`var'
		qui by FIEStoNS year: replace share_`var' = share_`var'[_n-1] if missing(share_`var') & _n > 1
	}
	
	* Calculated normalized intensity
	foreach var in x xcat xdd xdds { 
		if "`var'" == "x" {
			local name internet
			local orig y
		}
		if "`var'" == "xcat" {
			local name catalog
			local orig ycat
		}
		if "`var'" == "xdd" {
			local name dd
			local orig ydd
		}
		if "`var'" == "xdds" {
			local name dds
			local orig ydds
		}
		
		qui gen `var'_NS = share_`name'/share_exp
		qui gen `orig'_NS = `var'_NS
		qui egen `var'_NS_max = max(`var'_NS), by(year)
		qui replace `var'_NS = `var'_NS/`var'_NS_max
		qui replace `var'_NS = 0 if share_`name' == 0
		foreach year in 1999 2004 2009 2014 {

			qui gen `var'_NS`year' = `var'_NS if year == `year'
			gsort FIESCode -`var'_NS`year'
			by FIESCode: replace `var'_NS`year' = `var'_NS`year'[_n-1] if missing(`var'_NS`year')
			
			qui gen `orig'_NS`year' = `orig'_NS if year == `year'
			gsort FIESCode -`orig'_NS`year'
			by FIESCode: replace `orig'_NS`year' = `orig'_NS`year'[_n-1] if missing(`orig'_NS`year')
		}
	}
	
	keep FIESCode x_NS2* xcat_NS1* xcat_NS2* xdd_NS1* xdd_NS2* xdds_NS1* xdds_NS2* y_NS2* ycat_NS1* ycat_NS2*
	duplicates drop
	save "$Data/NS_int.dta", replace
	
	* Merge NS internet intensity into main dataset with price variable
	use "$Data/RakutenProject.dta", clear
	keep p1* p2* ptilde* out33_* outSpec_* cityid_RPS RPS description expenditure TN_* FIES 
	qui rename FIES FIESCode
	qui joinby FIESCode using "$Data/matchlist.dta"
	drop year FIEStoNS
	duplicates drop
	foreach var in  expenditure {
		bys FIESCode: gen `var'_sum = `var' if _n == 1  /* avoid double counting */
		egen temp = sum(`var'_sum)
		gen `var'_tilde = `var'/temp
		drop `var'_sum temp
	}
	*gen x_ratio1_l = gms_l/expenditure *proprietary data*
	*qui sum x_ratio1_l *proprietary data*
	*qui replace x_ratio1_l = x_ratio1_l/`r(max)' *proprietary data*
	*gen y_rakuten = gms_l/expenditure *proprietary data*
	*rename (x_ratio1_l expenditure_tilde) (x_rakuten te) *proprietary data*
	rename expenditure_tilde te
	
	* Reshape yearly
	sreshape long p ptilde_ out33_ outSpec_ , i(cityid_RPS RPS) j(year)
	rename (ptilde_ out33_ outSpec_) (ptilde out33 outSpec) 
	
	* Drop outliers
	foreach v in out33 outSpec {
		qui keep if `v'==0
	}
	
	* Merge intensity variables fron NSFIE
	merge m:1 FIESCode using "$Data/NS_int.dta", assert(2 3) keep(3) nogen
	
	pwcorr  x_NS* xcat*, sig
	
	* Label variables
	
	label var xcat_NS1999 "catalog intensity from NSFIE in 1999"
	label var ycat_NS1999 "(unnormalized) catalog intensity from NSFIE in 1999"
	label var xdd_NS1999 "deparment+discount intensity from NSFIE in 1999"
	label var xdds_NS1999 "deparment+discount+supermarket intensity from NSFIE in 1999"
	
	foreach var in x xcat xdd xdds y ycat{
		if "`var'" == "x" {
			local name e-commerce
		}
		if "`var'" == "y" {
			local name (unnormalized) e-commerce
		}
		if "`var'" == "xcat" {
			local name catalog
		}
		if "`var'" == "ycat" {
			local name (unnormalized) catalog
		}
		if "`var'" == "xdd" {
			local name department+discount
		}
		if "`var'" == "xdds" {
			local name department+discount+supermarket
		}
		foreach year in 2004 2009 2014 {
			cap label var `var'_NS`year' "`name' intensity from NSFIE in `year'"
		}
	
	}
	
	save "$Data/est_ready.dta", replace
	
	* Summary statistics on price dispersion
	gen l_ptilde = ln(ptilde)
	egen avg_l_ptilde = mean(l_ptilde), by(RPS year)
	gen l_ptilde_dispersion = l_ptilde - avg_l_ptilde
	sum l_ptilde_dispersion, d
	
	rm "$Data/FIES_name.dta" 
	rm "$Data/matchlist.dta" 
	rm "$Data/NS_int.dta"
end

*=============================================================================

*===============================================================================
*Program to plot figure 1
*===============================================================================
cap prog drop plotfigure1
prog define plotfigure1
	*import METI numbers as alternative dummy measurement
	import excel using "$Data/METI_numbers.xlsx", clear first sheet("Sheet1")
	gen METI_size = BtoCECMarketSize100million/10000
	gen aMETI_size = AdjustedB2Cmarketsize/10000
	rename (Year BtoCECShare) (year2 METI_share)
	gen METI_log_size = ln(AdjustedB2Cmarketsize/10)
	
	twoway (scatter METI_size year2, c(l) m(i) lw(thick) lc(black) lp(dash) ytitle("Trillion yen") ///
	ylabel(, format(%9.0gc) angle(0)) yaxis(1)  graphregion(color(white))) ///
	(scatter aMETI_size year2, c(l) m(i) lw(thick) lc(black) lp(solid) ytitle("Trillion yen")) ///
	,legend(order(1 "Official Series" 2 "Adjusted Series"))
	graph export "$figure/figure1.pdf", replace
	
	keep year2 METI_log_size METI_share
		
	save "$Data/METI_numbers", replace
end	

*=============================================================================
*Program to construct data
*===============================================================================
capture program drop data_fig2_3
program define data_fig2_3

import excel "C:\Dropbox\Rakuten\Estimation\1.Raw\zni2015a.xlsx", sheet("Sheet1") clear

qui ds A, not
local allvar `r(varlist)'
qui destring `allvar', force replace
foreach var in `allvar' {
	local code=`var'[4]
	qui rename `var' cpi`code'
}
qui rename A year
qui destring year, force replace
replace year = 0 if _n == 7
drop if missing(year)

qui reshape long cpi, i(year) j(RPS)
sort RPS year
xtset RPS year
gen pi = ln(cpi/L.cpi)*100
label var pi "Log-approx"
gen epi = (cpi/L.cpi-1)*100
label var epi "Exact inflation rate"

preserve
keep if year == 0
rename cpi cpi_weight
label var cpi_weight "CPI-Weight"
save "$Data/temp.dta", replace
restore
drop if year == 0
joinby RPS using "$Data/temp.dta"
save "$Data/item_cpi.dta", replace
rm "$Data/temp.dta"

********************************************************************************
*item level expenditure*
use "$Data/est_ready.dta", clear
keep  RPS x* te TN_DW
gduplicates drop

* Merge in broad category of goods
	preserve
		import excel using "$Data/Kaisetsu.xlsx", sheet("tradable_classification_NSyr") clear first
		keep FIESCode Table1Category
		qui drop if mi(Table1Category)
		qui duplicates drop
		save "$Data/tmp.dta", replace
		use "$Data/RakutenProject.dta", clear
		keep RPS FIES
		duplicates drop
		rename FIES FIESCode
		joinby FIESCode using "$Data/tmp.dta"
		keep RPS Table1Category
		duplicates drop
		save "$Data/tmp.dta", replace
	restore
	merge 1:1 RPS using "$Data/tmp.dta", keep(3) nogen
	rm "$Data/tmp.dta"
	

joinby RPS using "$Data/item_cpi.dta"
drop if TN_DW == 1

sort RPS year
local def x_NS2009

save "$Data/pretrend_pi.dta", replace

* construct category level price indices
sort RPS year
local def x_NS2009
	
*panel
keep if year >1981
bys RPS: egen temp = count(pi)
qui sum temp
*keep if temp == `r(max)'
drop temp

*rank category based on category-level average e-commerce intensity

gen m2004 = .
gen m2009 = .
gen m2014 = .

egen catgrtemp = group(Table1Category)
sum catgrtemp
forval i = 1/`r(max)' {

	foreach yr in 2004 2009 2014 {
		sum x_NS`yr' [aw=cpi_weight] if catgr == `i'
		replace m`yr' = `r(mean)' if catgr == `i'
	}

}
gen meanx = (m2004+m2009+m2014)/3
*gen meanx = m2009
drop catgrtemp

sort Table1Category year
bys Table1Category: gen temp =meanx if _n == 1
egen rk = rank(temp)
bys Table1Category: egen catgr = total(rk)


collapse (mean) pi epi [aw=cpi_weight], by(year catgr Table1Category)
drop if missing(catgr)

reshape wide pi epi Table1Category, i(year) j(catgr)


drop if missing(year)
*add observation base year
set obs `=_N+1'
qui sum year
local b `r(min)'
local c `r(max)'
replace year = `b' -1 if missing(year)
tsset year

foreach var in P eP{
	forval j = 1/20 {
		qui gen `var'`j' = 1 if missing(pi`j')
	}
} 

sort year
forval j =1/20 {
	replace P`j' = exp(pi`j'/100)*P`j'[_n-1] if missing(P`j')
	replace eP`j' = (1+epi`j'/100)*eP`j'[_n-1] if missing(eP`j')
}


foreach var in P eP{
	forval j = 1/20 {
	gen temp`var'`j' = `var'`j' if year == 1997
	egen temp2`var'`j' = total(temp`var'`j')
	gen n`var'`j' = `var'`j'/temp2`var'`j'
	}
}

*check*
forval j = 1/20 {
	gen temp_pi`j' =ln(nP`j'/L.nP`j')*100
	gen temp_epi`j' = (neP`j'/L.neP`j'-1)*100
	foreach var in pi epi {
		gen check_`var'`j' = `var'`j' - temp_`var'`j'
		replace check_`var'`j' = round(check_`var'`j', 0.0000001)
		qui count if !missing(check_`var'`j')
		if `r(N)'>0 {
		qui sum check_`var'`j' 
		assert `r(mean)' == 0
		}
	}
	foreach var in P eP nP neP{
		replace `var'`j'=. if missing(pi`j')
	}
}
drop if year <1982

save "$Data/pretrend_category_pi.dta", replace

end


*===============================================================================
*** Program to plot Figure2
*===============================================================================
capture program drop plotfigure2
program define plotfigure2 


use "$Data/pretrend_pi.dta", clear	
	
	
*panel
keep if year > 1981
*drop if year >2018
bys RPS: egen temp = count(pi)
qui sum temp
keep if temp == `r(max)'
drop temp

sort RPS year
bys RPS: gen first = 1 if _n == 1

local def x_NS2009
sum `def' if first == 1, d
gen bot_quartile = 1 if `def' < `r(p25)'
gen top_quartile = 1 if `def' > `r(p75)' & `def' ~= .

gen qind = 1 if bot_quartile == 1
replace qind =2 if top_quartile == 1

collapse (mean) pi epi [aw=cpi_weight], by(year qind)

label define qindcator 1 "Bottom Q" 2 "Top Q", modify
label values qind qindicator
drop if missing(qind)

reshape wide pi epi, i(year) j(qind)

foreach var in pi epi {
	rename `var'1 `var'_bot
	rename `var'2 `var'_top
	label var `var'_bot "`var' bottom quartile"
	label var `var'_top "`var' top quartile"
}
drop if missing(year)

*add observation base year
set obs `=_N+1'
qui sum year
local b `r(min)'
local c `r(max)'
replace year = `b' -1 if missing(year)
tsset year

foreach var in P eP{
	gen `var'_bot = 1 if year == `b' -1
	gen `var'_top = 1 if year == `b' -1
} 
sort year

replace P_bot = exp(pi_bot/100)*P_bot[_n-1] if missing(P_bot)
replace P_top = exp(pi_top/100)*P_top[_n-1] if missing(P_top)

replace eP_bot = (1+epi_bot/100)*P_bot[_n-1] if missing(eP_bot)
replace eP_top = (1+epi_top/100)*P_top[_n-1] if missing(eP_top)

foreach var in P_bot P_top eP_bot eP_top {
	gen temp`var' = `var' if year == 1997
	egen temp2`var' = total(temp`var')
	gen n`var' = `var'/temp2`var'
}

*check*log approximation of inflation*
foreach var in P_bot P_top nP_bot nP_top {
	gen temppi_`var' =ln(`var'/L.`var')*100
}
gen check = temppi_P_bot - temppi_nP_bot
replace check = round(check,0.0001)
qui sum check
assert `r(mean)' == 0

*check*log approximation of inflation*
foreach var in eP_bot eP_top neP_bot neP_top {
	gen temppi_`var' =(`var'/L.`var'-1)*100
}
gen checkep = temppi_eP_top - temppi_neP_top
replace checkep = round(checkep,0.0001)
qui sum checkep
assert `r(mean)' == 0
drop temp* check*

qui sum year
drop if year == `r(min)'

merge 1:1 year using "$Data/pretrend_category_pi.dta"

label var nP20 "Electronics Price Level (1997 = 1)"
twoway (scatter neP_bot year, lcolor(black) lwidth(thick) c(l) m(i)) ///
(scatter neP_top year, lcolor(black) lwidth(thick) lp(dash) c(l) m(i)) ///
(scatter nP20 year, lcolor(black) lwidth(thick) lp(dash_dot) c(l) m(i) yaxis(2)), ///
ti("Products Grouped by E-Commerce Intensity") ///
xtitle("Year", size(medium)) graphregion(color(white)) ylabel(, angle(0) format(%9.1f)) ///
ylabel(, angle(0) axis(2) format(%9.1f)) ytitle("Price Level (1997 = 1)") ///
legend(lab(1 "Bottom-quartile products (left axis)") lab(2 "Top-quartile products (left axis)") ///
lab(3 "Electronics (right axis)") order(1 2 3) size(small) bmargin(none) keygap(0.4) symxsize(11)) ///
xline(1997, lcolor(black)) xsc(r(`b' `c')) xlabel(1982 1987 1992 1997 2002 2007 2012 2017) 
graph export "$figure/figure2_left.pdf", replace

** catalog intensity
use "$Data/pretrend_pi.dta", clear	
	
*panel
keep if year > 1981
*drop if year >2018
bys RPS: egen temp = count(pi)
qui sum temp
keep if temp == `r(max)'
drop temp

sort RPS year
bys RPS: gen first = 1 if _n == 1

local def xcat_NS1999
sum `def' if first == 1, d
gen bot_quartile = 1 if `def' < `r(p25)'
gen top_quartile = 1 if `def' > `r(p75)' & `def' ~= .

gen qind = 1 if bot_quartile == 1
replace qind =2 if top_quartile == 1

collapse (mean) pi epi [aw=cpi_weight], by(year qind)

label define qindcator 1 "Bottom Q" 2 "Top Q", modify
label values qind qindicator
drop if missing(qind)

reshape wide pi epi, i(year) j(qind)

foreach var in pi epi {
	rename `var'1 `var'_bot
	rename `var'2 `var'_top
	label var `var'_bot "`var' bottom quartile"
	label var `var'_top "`var' top quartile"
}
drop if missing(year)

*add observation base year
set obs `=_N+1'
qui sum year
local b `r(min)'
local c `r(max)'
replace year = `b' -1 if missing(year)
tsset year

foreach var in P eP{
	gen `var'_bot = 1 if year == `b' -1
	gen `var'_top = 1 if year == `b' -1
} 
sort year

replace P_bot = exp(pi_bot/100)*P_bot[_n-1] if missing(P_bot)
replace P_top = exp(pi_top/100)*P_top[_n-1] if missing(P_top)

replace eP_bot = (1+epi_bot/100)*P_bot[_n-1] if missing(eP_bot)
replace eP_top = (1+epi_top/100)*P_top[_n-1] if missing(eP_top)

foreach var in P_bot P_top eP_bot eP_top {
	gen temp`var' = `var' if year == 1997
	egen temp2`var' = total(temp`var')
	gen n`var' = `var'/temp2`var'
}

*check*log approximation of inflation*
foreach var in P_bot P_top nP_bot nP_top {
	gen temppi_`var' =ln(`var'/L.`var')*100
}
gen check = temppi_P_bot - temppi_nP_bot
replace check = round(check,0.0001)
qui sum check
assert `r(mean)' == 0

*check*log approximation of inflation*
foreach var in eP_bot eP_top neP_bot neP_top {
	gen temppi_`var' =(`var'/L.`var'-1)*100
}
gen checkep = temppi_eP_top - temppi_neP_top
replace checkep = round(checkep,0.0001)
qui sum checkep
assert `r(mean)' == 0
drop temp* check*

qui sum year
drop if year == `r(min)'

twoway (scatter neP_bot year, lcolor(black) lwidth(thick) c(l) m(i)) ///
(scatter neP_top year, lcolor(black) lwidth(thick) lp(dash) c(l) m(i)), ///
xtitle("Year", size(medium)) graphregion(color(white)) ///
 ylabel(, angle(0) format(%9.1f)) ytitle("Price Level (1997 = 1)") ///
ti("Products Grouped by Catalog Sales Intensity") ///
legend(lab(1 "Bottom-quartile products") lab(2 "Top-quartile products") order(1 2) size(small)) ///
xline(1997, lcolor(black)) xsc(r(`b' `c')) xlabel(1982 1987 1992 1997 2002 2007 2012 2017) 
graph export "$figure/figure2_right.pdf", replace

end


*===============================================================================
*** Program to plot Figure 3
*===============================================================================
cap prog drop plotfigure3
prog define plotfigure3


* construct category level price indices
use "$Data/pretrend_pi.dta", clear	

*panel

local year2 2020
local year1 1997

drop if year<`year1'
drop if year>`year2'

*balanced panel*
bys RPS: egen temp = count(pi)
qui sum temp
keep if temp == `r(max)'
drop temp

collapse (mean) avgpi=pi avgepi =epi x_NS* xcat_NS*, by(RPS)


binscatter avgpi x_NS2014, mc(black) lc(black) ///
ytitle("Average Inflation Rate (%)") xtitle("E-Commerce Intensity in 2014") ///
graphregion(color(white))  ylabel(, angle(0)) xlabel(,format(%9.2f))
graph export "$figure/figure3_left.pdf", replace


binscatter avgpi xcat_NS1999, mc(black) lc(black) ///
ytitle("Average Inflation Rate (%) ") xtitle("Catalog Sales Intensity in 1999") ///
graphregion(color(white))  ylabel(, angle(0)) xlabel(,format(%9.2f))
graph export "$figure/figure3_right.pdf", replace

end

*==============================================================================	
* Prepare datasets for regression
*===============================================================================
cap program drop est_ready
program define est_ready
args y1 y2
	
	use "$Data/est_ready.dta", clear
	
	* prepare log prices
	qui gen avgp_prior=ptilde if year == `y1'
	qui gen avgp_post=ptilde if year == `y2' 
	
	qui gen avglp1 = log(avgp_prior)
	qui gen avglp2 = log(avgp_post)
	
	qui label var avglp1 "quality adjusted log price in `y1'"
	qui label var avglp2 "quality adjusted log price in `y2'"

	* save variable labels
	foreach v of var avglp1 avglp2  te TN_DW x_NS* xcat_NS* xdd_NS* xdds_NS* cityid_RPS RPS{
		local l`v' : variable label `v'
        if `"`l`v''"' == "" {
 		local l`v' "`v'"
		}
	}
	
	* collapse to have unique values by RPS and cityid_RPS
	collapse (mean) avglp1 avglp2 te TN_DW x_NS* xcat_NS* xdd_NS* xdds_NS*, by(RPS cityid_RPS)
	
	*label variabless
	foreach v of var * {
		label var `v' "`l`v''"
	}
  
	gen year1 = `y1'
	gen year2 = `y2'
	local x = `y2'-`y1'

	
	* generate price change
	qui gen delta_avgp = avglp2 - avglp1
	drop if missing(delta_avgp)

	* discard incomplete observations
	qui keep if !missing(avglp1) & !missing(avglp2)
			
	* e-commerce intensity times price in the previous period
	*qui gen xlp1_rakuten = x_rakuten*avglp1 /* Proprietary data Rakuten*/
	*label var xlp1_rakuten "\$ x(Rakuten) \times \Delta p_{ict-`x'}$"  /* Proprietary data Rakuten*/
	foreach year in 2004 2009 2014 {
		qui gen xlp1_NS`year' = x_NS`year'*avglp1 
		label var xlp1_NS`year' "\$ x(NS`year') \times \Delta p_{ict-`x'}$"
	}
	
	* category intensity times price in the previous period
	foreach year in 1999 2004 2009 2014 {
		qui gen xcatlp1_NS`year' = xcat_NS`year'*avglp1 
		label var xcatlp1_NS`year' "\$ xcat(NS`year') \times \Delta p_{ict-`x'}$"
	}
	
	* merge METI numbers
	merge m:1 year2 using "$Data/METI_numbers", keep(1 3) nogen
	
	* label variables
	label var delta_avgp  "\$\Delta p_{ict}$"
	label var avglp1	"\$ p_{ic`y1'} $"
	local var te 		"\$\tilde{e}$"
	
	* save datasets
	qui cap mkdir "$Data/pricereg"
	cd "$Data/pricereg"
	save "`y1'_`y2'_reg.dta", replace

end 

*=============================================================================
*** Program to plot Figure 4
*===============================================================================
cap prog drop plotfigure4
prog define plotfigure4


	reg_dind_scatter 1991 1996 x_NS2009
	reg_dind_scatter 1996 2001 x_NS2009
	
	grc1leg scatter_adj_p_1991_1996 scatter_adj_p_1996_2001 , ///
		iscale(*0.8) imargin(tiny) ycommon xcommon ///
		graphregion(margin(tiny) color(white)) plotregion(margin(tiny)) name(scatter_adj_p1, replace)
	
	grc1leg scatter_adj_p_smallx_1991_1996 scatter_adj_p_smallx_1996_2001, ///
		iscale(*0.8) imargin(tiny) ycommon xcommon ///
		graphregion(margin(tiny) color(white)) plotregion(margin(tiny)) name(scatter_adj_p2, replace)
	
	grc1leg scatter_adj_p_bigx_1991_1996 scatter_adj_p_bigx_1996_2001, ///
		iscale(*0.8) imargin(tiny) ycommon xcommon ///
		graphregion(margin(tiny) color(white)) plotregion(margin(tiny)) name(scatter_adj_p3, replace)
	
	grc1leg scatter_adj_p1 scatter_adj_p2 scatter_adj_p3, ///
		row(3) iscale(*0.8) imargin(tiny) ///
		graphregion(margin(tiny) color(white)) plotregion(margin(tiny)) name(scatter_adj_p, replace)
	graph display scatter_adj_p, ysize(8)
	graph export "$figure/figure4.pdf", replace 
	graph close

end


*=============================================================================
*** Program to plot Figure A2
*===============================================================================
cap prog drop plotfigure_a2
prog define plotfigure_a2


	reg_dind_scatter 1991 1996 x_rakuten
	reg_dind_scatter 1996 2001 x_rakuten
	
	grc1leg scatter_adj_p_1991_1996 scatter_adj_p_1996_2001 , ///
		iscale(*0.8) imargin(tiny) ycommon xcommon ///
		graphregion(margin(tiny) color(white)) plotregion(margin(tiny)) name(scatter_adj_p1, replace)
	
	grc1leg scatter_adj_p_smallx_1991_1996 scatter_adj_p_smallx_1996_2001, ///
		iscale(*0.8) imargin(tiny) ycommon xcommon ///
		graphregion(margin(tiny) color(white)) plotregion(margin(tiny)) name(scatter_adj_p2, replace)
	
	grc1leg scatter_adj_p_bigx_1991_1996 scatter_adj_p_bigx_1996_2001, ///
		iscale(*0.8) imargin(tiny) ycommon xcommon ///
		graphregion(margin(tiny) color(white)) plotregion(margin(tiny)) name(scatter_adj_p3, replace)
	
	grc1leg scatter_adj_p1 scatter_adj_p2 scatter_adj_p3, ///
		row(3) iscale(*0.8) imargin(tiny) ///
		graphregion(margin(tiny) color(white)) plotregion(margin(tiny)) name(scatter_adj_p, replace)
	graph display scatter_adj_p, ysize(8)
	graph export "$figure/figure_a2.pdf", replace 
	graph close

end

*===============================================================================
*** Program to table 1: E-Commerce intensity of consumer expenditure
*===============================================================================
cap program drop table1
program define table1
	
	* Limited to the restricted sample, i.e. FIES matches with NSFIE
	import excel using "$Data/Kaisetsu.xlsx", sheet("tradable_classification_NSyr") clear first
	keep FIESCode FIEStoNS* Tradable Table1Category
	qui drop if mi(Tradable)
	qui duplicates drop
	save "$Data/FIES_name.dta", replace

	use "$Data/RakutenProject.dta", clear
	keep FIES expenditure TN_DW
	drop if mi(expenditure)
	duplicates drop
	rename FIES FIESCode
	merge 1:1 FIESCode using "$Data/FIES_name.dta", keep(3) nogen
	keep if !mi(FIEStoNS1999)|!mi(FIEStoNS2004)|!mi(FIEStoNS2009)|!mi(FIEStoNS2014)
	keep FIESCode FIEStoNS1999 FIEStoNS2004 FIEStoNS2009 FIEStoNS2014 expenditure TN_DW Table1Category
	duplicates drop
	sreshape long FIEStoNS, i(FIESCode TN_DW expenditure Table1Category) j(year)
	drop if mi(FIEStoNS)
	save "$Data/matchlist.dta", replace

	* Calculate internet intensity and share of expenditures
	use "$Data/NSFIE_totalhh_national.dta", clear
	keep if inlist(type_purchase,"Internet","All","Catalog") 
	rename product_code FIEStoNS
	joinby FIEStoNS year using "$Data/matchlist.dta"
	
	* Keep only goods
	keep if TN_DW == 0
	
	* Define NSFIE expenditure share and sales share
	bys TN_DW FIEStoNS year type_purchase: gen sum = avg_exp if _n == 1 /*avoid double counting*/
	egen total_internet = total(sum) if type_purchase == "Internet", by(year TN_DW)
	egen total_exp = total(sum) if type_purchase == "All", by(year TN_DW)
	egen total_catalog = total(sum) if type_purchase == "Catalog", by(year TN_DW)
	
	gen share_catalog = avg_exp/total_catalog if type_purchase == "Catalog"
	gen share_internet = avg_exp/total_internet if type_purchase == "Internet"
	gen share_exp = avg_exp/total_exp if type_purchase == "All"
	
	keep FIEStoNS year type_purchase share_internet share_exp share_catalog description FIESCode Table1Category expenditure TN_DW
	duplicates drop
	
	* Define expenditure share and Rakuten sales share
	foreach var in  expenditure {
		bys TN_DW FIESCode: gen `var'_sum = `var' if _n == 1  /* avoid double counting */
		egen temp = total(`var'_sum), by(TN_DW)
		gen `var'_tilde = `var'/temp
		drop `var'_sum temp
	}
	qui rename expenditure_tilde te
	
	* Avoid duplicates
	bys TN_DW FIESCode (te): replace te = . if _n ~= 1
	bys TN_DW FIEStoNS year type_purchase: replace share_internet = . if _n ~= 1 & type_purchase == "Internet"
	bys TN_DW FIEStoNS year type_purchase: replace share_exp = . if _n ~= 1 & type_purchase == "All"
	bys TN_DW FIEStoNS year type_purchase: replace share_catalog = . if _n ~= 1 & type_purchase == "Catalog"
	
	* Check the share of expenditure sum to 1
	foreach var in "share_catalog" "share_exp" "share_internet" {
		qui egen test = total(`var'), by(year TN_DW)
		if "`var'" == "share_internet" {
			sum test if year ~= 1999
		}
		else{
			sum test
		}
		drop test
	}
	foreach var in "te " {
		qui egen test = total(`var'), by(TN_DW)
		sum test
		drop test
	}
	
	foreach year in 2004 2009 2014 {
		qui gen share_internet_`year' = share_internet if year == `year'
	}
	
	foreach year in 1999 2004 2009 2014 {
		qui gen share_exp_`year' = share_exp if year == `year'
		qui gen share_catalog_`year' = share_catalog if year == `year'
	}

	
	* Table 1: E-Commerce intensity on goods
	* All values in percentage
		
		collapse (sum) share_exp_2009 share_internet_2009 share_exp_2004 share_internet_2004 share_exp_2014 share_internet_2014 share_catalog_1999 share_exp_1999, by(Table1Category TN_DW)
		ds Table1Category TN_DW, not
		foreach var in `r(varlist)' {
			qui replace `var' = `var'*100
		}
		
		* x defines as internet share over expenditure share, over the max of this ratio
		foreach year in 2004 2009 2014{
			egen max_x = max(share_internet_`year'/share_exp_`year')
			gen x_NS`year' = (share_internet_`year'/share_exp_`year')/max_x
			gen y_NS`year' = (share_internet_`year'/share_exp_`year')
			drop max_x
		}	
		local year 1999
		
		egen max_xcat = max(share_catalog_`year'/share_exp_`year')
		gen xcat_NS`year' = (share_catalog_`year'/share_exp_`year')/max_xcat
		gen ycat_NS`year' = (share_catalog_`year'/share_exp_`year')
		drop max_xcat	
		
		/*Rakuten data is proprietary*/
		*egen max_rakuten = max(gms_l_tilde/te)
		*gen x_rakuten = (gms_l_tilde/te)/max_rakuten
		*gen y_rakuten = (gms_l_tilde/te)
		*drop max_rakuten
		
		gsort -share_exp_2009	
				
		* Format for export
		gen order = _n
		gen name = "{    "+strtrim(Table1Category)+"}"
		sum order
		forvalues i = 1/`r(N)'{
			label define tabout_lbl `=order[`i']' "`=name[`i']'", modify
		}
		label values order tabout_lbl
		
		tabout order using  "$table/table1.tex", replace format(2c) bt font(bold) show(all) ///
		layout(rb) h1(nil) h2(nil) h3(nil) sum style(tex) oneway ///
		cells(sum share_exp_2009 sum share_internet_2009 mean x_NS2004 mean x_NS2009 mean x_NS2014 mean xcat_NS1999) ///
		ptotal(all) clab(_ _ _ _) ///
		topf("$table/table1_top.tex") botf("$table/table1_bot.tex")
		
		filefilter "$table/table1.tex" "$table/tmp.tex", ///
		from( "\BStextbf{Total}" ) to("\BStextbf{Total/Mean}") replace
		filefilter "$table/tmp.tex""$table/table1.tex", replace
		
	rm "$table/tmp.tex"	
	rm "$Data/FIES_name.dta"
	rm "$Data/matchlist.dta"
end

*===============================================================================
* Calculate summary statistics for tradable goods
*===============================================================================
cap program drop table2
program define table2

	use "$Data/est_ready.dta", clear
	
	drop if TN_DW == 1
	keep RPS FIESCode xcat_NS1999 x_NS2009 x_NS2004 x_NS2014
	gduplicates drop
	
	*label var x_rakuten "\$ x_{i10}^{R} $"
	label var x_NS2004 "\$ x_{i04}^{E} $"
	label var x_NS2009 "\$ x_{i09}^{E} $"
	label var x_NS2014 "\$ x_{i14}^{E} $"
	label var xcat_NS1999 "\$ x_{i99}^{C} $"
	
	foreach var in x_NS2004 x_NS2009 x_NS2014 xcat_NS1999 {
		gduplicates drop FIESCode, force
		if "`var'" == "x_NS2004"{
			local file "replace"
		}
		else {
			local file "append"
		}
		
		est clear
		estpost tabstat `var', statistics(count mean sd min p10 p50 p90 max) columns(statistics) listwise 
		esttab using "$table/table2.tex", substitute(\_ _) nonumbers cell("count(fmt(%9.0fc)) mean(fmt(%9.3fc)) sd(fmt(%9.3fc)) min(fmt(%9.3fc)) p10(fmt(%9.3fc)) p50(fmt(%9.3fc)) p90(fmt(%9.3fc)) max(fmt(%9.3fc))") `file' label noobs
	}
	
	use "$Data/pricereg/1991_1992_reg.dta", clear
	forvalues y1 =1992/2015 {
		qui append using "$Data/pricereg/`y1'_`=`y1'+1'_reg.dta"
	}
	
	drop if TN_DW == 1
	label var delta_avgp  "\$\Delta p_{ict}$"
	
	estpost tabstat delta_avgp, statistics(count mean sd min p10 p50 p90 max) columns(statistics) listwise 
	esttab using "$table/table2.tex", substitute(\_ _) nonumbers ///
	cell("count(fmt(%9.0fc)) mean(fmt(%9.3fc)) sd(fmt(%9.3fc)) min(fmt(%9.3fc)) p10(fmt(%9.3fc)) p50(fmt(%9.3fc)) p90(fmt(%9.3fc)) max(fmt(%9.3fc))") append label noobs
	
	filefilter "$table/table2.tex" "$table/tmp.tex", ///
		from("\BShline\BShline\n\BSend{tabular}\n}\n{\n\BSdef\BSsym#1{\BSifmmode^{#1}\BSelse\BS(^{#1}\BS)\BSfi}\n\BSbegin{tabular}{l*{1}{cccccccc}}\n\BShline\BShline\n                    &\BSmulticolumn{8}{c}{}") ///
		to("") replace
	filefilter "$table/tmp.tex" "$table/table2.tex", ///
		from( "&       count&        mean&          sd&         min&         p10&         p50&         p90&         max\BS\BS" ) ///
		to("&       N&        Mean&          St. Dev.&         Min&         p10&         p50&         p90&         Max\BS\BS") replace

	rm "$table/tmp.tex"

end 

*===============================================================================
* One period regression specification
*===============================================================================
cap program drop pricereg
program define pricereg
args y1 y2 def fe goods iv dummy

	if "`fe'" == "city_product"{
		local fixed_effect = "citydum_t product_t"
	}
	if "`fe'" == "product"{
		local fixed_effect = "product_t"
	}
	
	use "$Data/pricereg//`y1'_`y2'_reg.dta", clear
	drop if TN_DW == 1 & "`goods'" == "tr"

	egen citydum_t = group(cityid_RPS year2)
	egen product_t = group(RPS year2)
	
	egen city_gr=group(cityid_RPS)
	egen product_gr = group(RPS)
	
	* limit to products and cities that exist in all relevant years
	foreach year in `y2' {
		cap qui gen b_city_`year' = 1 if (year2 == `year')& avglp1 ~= .
		sort cityid_RPS b_city_`year'
		cap qui by cityid_RPS: replace b_city_`year' = b_city_`year'[_n-1] if missing(b_city_`year') & _n~= 1
		
		cap qui gen b_product_`year' = 1 if (year2 == `year') & avglp1 ~= .
		sort RPS b_product_`year'
		cap qui by RPS: replace b_product_`year' = b_product_`year'[_n-1] if missing(b_product_`year') & _n~= 1
	}
	
	ds b_product* b_city*
	foreach var in `r(varlist)'{
		qui keep if `var' == 1
	}
	
	* generate dummy for period 2 interacte with xp
	if "`dummy'"=="d_1997"{
		qui gen dummy = 1 if year2>= 1997
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_size"{
		qui gen dummy = METI_log_size
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_share"{
		qui gen dummy = METI_share
		qui replace dummy = 0 if dummy == .
	}

	
	if "`def'" ==  "x_rakuten" {
		gen xlp=xlp1_rakuten
		gen d_xlp1 = xlp*dummy
		local x_year "2010"
	}
	if "`def'" == "x_NS2004" {
		gen xlp=xlp1_NS2004
		gen d_xlp1 = xlp*dummy
		local x_year "2004"
	}
	if "`def'" == "x_NS2009" {
		gen xlp=xlp1_NS2009
		gen d_xlp1 = xlp*dummy
		local x_year "2009"
	}
	if "`def'" == "x_NS2014" {
		gen xlp=xlp1_NS2014
		gen d_xlp1 = xlp*dummy
		local x_year "2014"
	}
	
	gen xcatlp = xcatlp1_NS1999
	gen d_xcatlp1 = dummy*xcatlp1_NS1999
	gen d_p = dummy*avglp1

	
	if `y2' >= 1997 & "`iv'" == "ols"{ 
		eststo: reghdfe delta_avgp avglp1 d_xlp1, absorb(`fixed_effect', save) vce(cluster cityid_RPS RPS)
		estadd local iv "OLS"
		estadd scalar r_squared `e(r2)'
		estadd local fx "Product/City"
	}
	if `y2' < 1997 &  "`iv'" == "ols"{ 
		eststo: reghdfe delta_avgp avglp1 xlp, absorb(`fixed_effect', save) vce(cluster cityid_RPS RPS)
		estadd local iv "OLS"
		estadd scalar r_squared `e(r2)'
		estadd local fx "Product/City"
	}
	if `y2' >= 1997 &  "`iv'" == "2sls"{ 			
		eststo: ivreghdfe delta_avgp avglp1 (d_xlp1=d_xcatlp1), absorb(`fixed_effect') cluster(cityid_RPS RPS) savefirst first saverf 
		estadd scalar fs = e(widstat)
		estadd local iv "IV"
		estadd local fx "Product/City"
	}
	if `y2' < 1997 &  "`iv'" == "2sls"{ 				
		eststo: ivreghdfe  delta_avgp avglp1 (xlp=xcatlp), absorb(`fixed_effect') cluster(cityid_RPS RPS) savefirst first saverf
		estadd scalar fs = e(widstat)
		estadd local iv "IV"	
		estadd local fx "Product/City"
	}
	if `y2' >= 1997 &  "`iv'" == "2sls_2iv"{ 			
		eststo: ivreghdfe delta_avgp avglp1 (d_xlp1=d_xcatlp1 d_xraklp1), absorb(`fixed_effect') cluster(cityid_RPS RPS) savefirst first saverf
		estadd scalar fs = e(widstat)
		estadd local iv "IV (Cat, Rak)"
		estadd scalar jstat = e(j)
		estadd scalar jpval = e(jp)
		estadd local fx "Product/City"
	}
	if `y2' < 1997 &  "`iv'" == "2sls_2iv"{ 				
		eststo: ivreghdfe  delta_avgp avglp1 (xlp=xcatlp xraklp), absorb(`fixed_effect') cluster(cityid_RPS RPS) savefirst first saverf
		estadd scalar fs = e(widstat)
		estadd local iv "IV (Cat, Rak)"	
		estadd scalar jstat = e(j)
		estadd scalar jpval = e(jp)
		estadd local fx "Product/City"
	}
	estadd local pr "\{`y2'\}"
	estadd local "x_t" `x_year'
	estadd local k `=`y2'-`y1''
	
end

*==============================================================================
* Diff-in-Diff regression  specification
*===============================================================================
cap program drop reg_dind
program define reg_dind
args y1 y2 y3 y4 def fe goods iv dummy

*reg_dind 1991 1996 1996 2001 `def' "`fe'" tr `est' "`dummy'"

	if "`fe'" == "city_product"{
		local fixed_effect = "citydum_t product_t"
	}
	if "`fe'" == "product"{
		local fixed_effect = "product_t"
	}

	cd "$Data/pricereg/"
	
	use "`y1'_`y2'_reg.dta", clear
	append using "`y3'_`y4'_reg.dta"

	* keep tradables
	*drop if TN_DW == 1 & "`goods'" == "tr"

	* generate variable for fixed effects that vary for each year
	egen citydum_t = group(cityid_RPS year2)
	egen product_t = group(RPS year2)
			
	* find the product, city, and product-city pairs that exist in each year
	forvalues i = 2(2)4 {
		cap qui gen b_city_`y`i'' = 1 if (year2 == `y`i'')& avglp1 ~= .
		sort cityid_RPS b_city_`y`i''
		cap qui by cityid_RPS: replace b_city_`y`i'' = b_city_`y`i''[_n-1] if missing(b_city_`y`i'') & _n~= 1
		
		cap qui gen b_product_`y`i'' = 1 if (year2 == `y`i'') & avglp1 ~= .
		sort RPS b_product_`y`i''
		cap qui by RPS: replace b_product_`y`i'' = b_product_`y`i''[_n-1] if missing(b_product_`y`i'') & _n~= 1
	}	
	
	* keep if product exists in all 4 years, keep if city exist in all 4 years
	ds b_product* b_city*
	foreach var in `r(varlist)'{
		qui keep if `var' == 1
	}
	
	* generate D*xlp, where D is a dummy that's 1 if in the second period
	if "`dummy'"=="d_1997"{
		qui gen dummy = 1 if year2>= 1997
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_size"{
		qui gen dummy = METI_log_size
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_share"{
		qui gen dummy = METI_share
		qui replace dummy = 0 if dummy == .
	}
	
	if "`def'" ==  "x_rakuten" {
		gen xlp=xlp1_rakuten
		gen d_xlp1 = xlp*dummy
		local x_year "2010"
	}
	if "`def'" == "x_NS2004" {
		gen xlp=xlp1_NS2004
		gen d_xlp1 = xlp*dummy
		local x_year "2004"
	}
	if "`def'" == "x_NS2009" {
		gen xlp=xlp1_NS2009
		gen d_xlp1 = xlp*dummy
		local x_year "2009"
	}
	if "`def'" == "x_NS2014" {
		gen xlp=xlp1_NS2014
		gen d_xlp1 = xlp*dummy
		local x_year "2014"
	}
	
	gen xcatlp = xcatlp1_NS1999
	gen d_xcatlp1 = dummy*xcatlp1_NS1999
	gen d_p = dummy*avglp1
	
	
	* runs difference in difference specification
	if "`iv'" == "ols" {
		eststo: reghdfe delta_avgp avglp1 xlp d_xlp1 d_p, absorb(`fixed_effect', save) vce(cluster cityid_RPS RPS)
		estadd scalar r_squared `e(r2)'
		* add statistics to output
		estadd local pr "\{`y2',`y4'\}"
		estadd local "x_t" `x_year'
		estadd local k `=`y2'-`y1''
		estadd local fx "Product/City"
	
	}
	if "`iv'" == "2sls" {
		eststo: ivreghdfe delta_avgp avglp1 d_p (xlp d_xlp1 = xcatlp d_xcatlp1), absorb(`fixed_effect') cluster(cityid_RPS RPS)  savefirst first saverf savefprefix(col3_st1)
		estadd scalar fs = e(widstat)
		estadd local iv "IV"
		estadd local pr "\{`y2',`y4'\}": col3_st1xlp
		estadd local pr "\{`y2',`y4'\}": col3_st1d_xlp1 
		* add statistics to output
		estadd local pr "\{`y2',`y4'\}"
		estadd local "x_t" `x_year'
		estadd local k `=`y2'-`y1''
		estadd local fx "Product/City"
	}
	if "`iv'" == "2sls_2iv" {
		eststo: ivreghdfe delta_avgp avglp1 d_p (xlp d_xlp1 = xcatlp xraklp d_xcatlp1 d_xraklp1), absorb(`fixed_effect') cluster(cityid_RPS RPS)  savefirst first saverf savefprefix(col3_st1)
		estadd scalar fs = e(widstat)
		estadd local iv "IV (Cat, Rak)"
		estadd scalar jstat = e(j)
		estadd scalar jpval = e(jp)
		
		mat a = e(first)
		estadd scalar fst1 = a[4,1]: col3_st1xlp
		estadd scalar fst1 = a[4,2]: col3_st1d_xlp1 
		estadd scalar fs = e(widstat): col3_st1xlp
		estadd local iv "IV (Cat, Rak)": col3_st1xlp
		estadd scalar fs = e(widstat): col3_st1d_xlp1 
		estadd local iv "IV (Cat, Rak)": col3_st1d_xlp1 
		
		* add statistics to output
		estadd local pr "\{`y2',`y4'\}"
		estadd local "x_t" `x_year'
		estadd local k `=`y2'-`y1''
		estadd local tfx "Product/City"
	
	}
	

end

*=================================================================================
* Diff-in-Diff annual regression specification
*===============================================================================
cap program drop annual_reg
program define annual_reg
args yr2_st yr2_end def fe goods iv dummy

	if "`fe'" == "city_product"{
		local fixed_effect = "citydum_t product_t"
		local fn "Product/City"
	}
	if "`fe'" == "product"{
		local fixed_effect = "product_t"
		local fn "Product"
	}
	
	qui use "$Data/pricereg//`=`yr2_st'-1'_`yr2_st'_reg.dta", clear
	forvalues y1 = `=`yr2_st''/`=`yr2_end'-1' {
		qui append using "$Data/pricereg//`y1'_`=`y1'+1'_reg.dta"
	}
	
	qui drop if TN_DW == 1 & "`goods'" == "tr"

	qui egen citydum_t = group(cityid_RPS year2)
	qui egen product_t = group(RPS year2)

	* balanced panel: limit to products and cities that exist in all relevant years
	forvalues year = `yr2_st'/`yr2_end' {
		cap qui gen b_city_`year' = 1 if (year2 == `year')& avglp1 ~= .
		sort cityid_RPS b_city_`year'
		cap qui by cityid_RPS: replace b_city_`year' = b_city_`year'[_n-1] if missing(b_city_`year') & _n~= 1
		
		cap qui gen b_product_`year' = 1 if (year2 == `year') & avglp1 ~= .
		sort RPS b_product_`year'
		cap qui by RPS: replace b_product_`year' = b_product_`year'[_n-1] if missing(b_product_`year') & _n~= 1
	}

	ds b_product* b_city*
	foreach var in `r(varlist)'{
		qui keep if `var' == 1
	}

	* generate dummy for period 2 interacte with xp
	if "`dummy'"=="d_1997"{
		qui gen dummy = 1 if year2>= 1997
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_size"{
		qui gen dummy = METI_log_size
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_share"{
		qui gen dummy = METI_share
		qui replace dummy = 0 if dummy == .
	}
	
	if "`def'" ==  "x_rakuten" {
		qui gen xlp=xlp1_rakuten
		qui gen d_xlp1 = xlp*dummy
		local x_year "2010"
	}
	if "`def'" == "x_NS2004" {
		qui gen xlp=xlp1_NS2004
		qui gen d_xlp1 = xlp*dummy
		local x_year "2004"
	}
	if "`def'" == "x_NS2009" {
		qui gen xlp=xlp1_NS2009
		qui gen d_xlp1 = xlp*dummy
		local x_year "2009"
	}
	if "`def'" == "x_NS2014" {
		qui gen xlp=xlp1_NS2014
		qui gen d_xlp1 = xlp*dummy
		local x_year "2014"
	}
	
	qui gen xcatlp = xcatlp1_NS1999
	qui gen d_xcatlp1 = dummy*xcatlp1_NS1999
	qui gen d_p = dummy*avglp1
	

	* runs difference in difference specification
	if "`iv'" == "ols" {
		eststo: reghdfe delta_avgp avglp1 xlp d_xlp1 d_p, absorb(`fixed_effect', save) vce(cluster cityid_RPS RPS) resid
		estadd local pr "Annual"
		estadd local "x_t" `x_year'
		estadd scalar r_squared `e(r2)'
		estadd local fx "`fn'"
		
	}
	if "`iv'" == "2sls" {
	
		eststo: ivreghdfe delta_avgp avglp1 d_p (xlp d_xlp1 = xcatlp d_xcatlp1), absorb(`fixed_effect', save resid(residuals)) cluster(cityid_RPS RPS) savefirst first saverf
		mat fstat = e(first)
		estadd local iv "IV"		
		estadd local pr "Annual"
		estadd local pr2 "`yr2_st'-`yr2_end'"
		estadd local "x_t" `x_year'
		estadd scalar fs = e(widstat)
		estadd local fx "`fn'"	
		
	}
		
		
end

*===============================================================================
* DinD annual regression discrete
*===============================================================================
cap program drop annual_reg_discrete
program define annual_reg_discrete
args yr2_st yr2_end def fe goods iv bins dummy
	
	if "`fe'" == "city_product"{
		local fixed_effect = "citydum_t product_t"
	}
	if "`fe'" == "product"{
		local fixed_effect = "product_t"
	}

	use "$Data/pricereg//`=`yr2_st'-1'_`yr2_st'_reg.dta", clear
	forvalues y1 = `=`yr2_st''/`=`yr2_end'-1' {
		qui append using "$Data/pricereg//`y1'_`=`y1'+1'_reg.dta"
	}
	
	drop if TN_DW == 1 & "`goods'" == "tr"

	egen citydum_t = group(cityid_RPS year2)
	egen product_t = group(RPS year2)

	* limit to products and cities that exist in all relevant years
	forvalues year = `yr2_st'/`yr2_end' {
		cap qui gen b_city_`year' = 1 if (year2 == `year')& avglp1 ~= .
		sort cityid_RPS b_city_`year'
		cap qui by cityid_RPS: replace b_city_`year' = b_city_`year'[_n-1] if missing(b_city_`year') & _n~= 1
		
		cap qui gen b_product_`year' = 1 if (year2 == `year') & avglp1 ~= .
		sort RPS b_product_`year'
		cap qui by RPS: replace b_product_`year' = b_product_`year'[_n-1] if missing(b_product_`year') & _n~= 1
	}

	ds b_product* b_city*
	foreach var in `r(varlist)'{
		qui keep if `var' == 1
	}

	* generate dummy for period 2 interacte with xp
	if "`dummy'"=="d_1997"{
		qui gen dummy = 1 if year2>= 1997
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_size"{
		qui gen dummy = METI_log_size
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_share"{
		qui gen dummy = METI_share
		qui replace dummy = 0 if dummy == .
	}
	
	if "`def'" ==  "x_rakuten" {
		gen xlp=xlp1_rakuten
		gen d_xlp1 = xlp*dummy
		local x_year "2010"
	}
	if "`def'" == "x_NS2004" {
		gen xlp=xlp1_NS2004
		gen d_xlp1 = xlp*dummy
		local x_year "2004"
	}
	if "`def'" == "x_NS2009" {
		gen xlp=xlp1_NS2009
		gen d_xlp1 = xlp*dummy
		local x_year "2009"
	}
	if "`def'" == "x_NS2014" {
		gen xlp=xlp1_NS2014
		gen d_xlp1 = xlp*dummy
		local x_year "2014"
	}
	
	bys RPS: gen first_x = `def' if _n == 1
	sum first_x, d
	
	foreach qt in 50 33 66 {
		egen x`qt' = pctile(first_x), p(`qt')
	}	
	
	bys RPS: gen first_xcat = xcat_NS1999 if _n == 1
	sum first_xcat, d
	
	foreach qt in 50 33 66 {
		egen xcat`qt' = pctile(first_xcat), p(`qt')
	}
	
	*2 bins (0-50) (50-100)
	gen I21 = 1 if `def' <= x50
	gen I22 = 1 if `def' > x50 & `def' ~= .
	
	gen Ixcat21 = 1 if xcat_NS1999 < = xcat50
	gen Ixcat22 = 1 if xcat_NS1999 > xcat50 & xcat_NS1999 ~= .
	
	foreach var in I21 I22  {
		replace `var' = 0 if missing(`var') & !missing(`def')
	}
	foreach var in Ixcat21 Ixcat22  {
		replace `var' = 0 if missing(`var') & !missing(xcat_NS1999)
	}
	foreach var in I21 I22 Ixcat21 Ixcat22 {
		gen `var'lp = `var'*avglp1
		gen  d_`var'lp = dummy*`var'*avglp1
		label var `var'lp "`var' \$\times$ Lagged Price"
		label var d_`var'lp "`var' \$\times$ Lagged Price \$\times$ \$ D_{t}$"
	}
	
	
	gen d_p = dummy* avglp1
	
	* runs difference in difference specification
	if "`iv'" == "ols"{
		if "`bins'" == "I2" {
			eststo: reghdfe delta_avgp avglp1 d_p I22lp d_I22lp, absorb(`fixed_effect') vce(cluster cityid_RPS RPS) nocon
			estadd local pr "Annual"
			estadd local "x_t" `x_year'
			estadd scalar r_squared `e(r2)'
		}
	}
		
	if "`iv'" == "2sls"{
		if "`bins'" == "I2" {
			eststo: ivreghdfe delta_avgp avglp1 d_p (I22lp d_I22lp = Ixcat22lp d_Ixcat22lp), absorb(`fixed_effect') cluster(cityid_RPS RPS) savefirst first saverf nocon
			estadd local iv "IV"		
			estadd local pr "Annual"
			estadd local "x_t" `x_year'
			estadd scalar fs = e(widstat)
		}
			
	}
		
end

*=================================================================================
* DinD specification: main results * Table 6
*===============================================================================
cap program drop reg_dind_base
program define reg_dind_base
args def dummy est no
		
	est clear
		
	local fe "city_product"
	pricereg 1991 1996 `def' "`fe'" tr `est' "`dummy'" /*column1 of table 6*/
	pricereg 1996 2001 `def' "`fe'" tr `est' "`dummy'" /*column2 of table 6*/
	reg_dind 1991 1996 1996 2001 `def' "`fe'" tr `est' "`dummy'" /*column3 of table 6*/
	annual_reg 1992 2001 `def' "`fe'" tr `est' "`dummy'" /*column4 of table 6*/
	local fe "product"
	annual_reg 1992 2001 `def' "`fe'" tr `est' "`dummy'" /*column5 of table 6*/
		
	label var delta_avgp  "\$\Delta$ \$ p_{ict}$"
	label var avglp1 "Lagged Price "
	label var xlp "E-Commerce Intensity \$\times$ Lagged Price "
		
	if "`dummy'"=="d_1997" {
		* label variables
		label var d_xlp1 "\$ D_{t}$ \$\times$ E-Commerce Intensity \$\times$ Lagged Price"
		label var d_p "\$ D_{t}$ \$\times$ Lagged Price "
	}
	if "`dummy'"=="METI_size" {
		* label variables
		label var d_xlp1 "EC Market Size \$\times$ E-Commerce Intensity \$\times$ Lagged Price"
		label var d_p "EC Market Size \$\times$ Lagged Price "
	}
				
	if "`fe'" == "city_product"{
		local suff = ""
	}
	if "`fe'" == "product"{
		local suff = "pd_"
	}
		
	if "`est'" == "ols"{
		esttab using "$table/table`no'.tex", order(avglp1 xlp d_p d_xlp1) ///
		keep(avglp1 xlp d_xlp1 d_p) b(3) se(3) ar2(3) ///
		nolegend nonotes star(* 0.10 ** 0.05 *** 0.01) replace label wrap substitute(\_ _) ///
		stats(fx pr N r_squared, labels("Fixed Effects" "\$t$" "Observations" "\$R^{2}$") fmt(%50 %50 %9.0fc %9.2fc)) 
	}	
	if "`est'"=="2sls" {
		esttab using "$table/table`no'.tex", ///
		order(avglp1 xlp d_p d_xlp1) keep(avglp1 xlp d_xlp1 d_p) b(3) se(3) ar2(3) ///
		nolegend nonotes star(* 0.10 ** 0.05 *** 0.01) replace label wrap substitute(\_ _) ///
		stats(fx pr N fs iv, labels("Fixed Effects" "\$t$" "Observations" "First-stage F" "Estimation") fmt(%50 s%50 %9.0fc %9.2fc %50)) 
	}
		
	local ab filefilter "$table/table`no'.tex" "$table/tmp.tex" 
	local ba filefilter "$table/tmp.tex" "$table/table`no'.tex" 
		
	`ab', from ("Annual\BS\BS") to ("Annual\BS\BS\n&&&&1991-2001\BS\BS") replace
	`ba', from ("                    &\BSmulticolumn{1}{c}{$\BSDelta p_{ict}$}") to ("Dependent Variable&\BSmulticolumn{1}{c}{$\BSDelta p_{ict}$}") replace
	`ab', from ("\BSbegin{tabular}") to ("\BSadjustbox{max width=\BStextwidth}{\n\BSbegin{tabular}") replace
	`ba', from ("\BSend{tabular}") to ("\BSend{tabular}\n}") replace	
	`ab', from ("EC Market Size") to ("Log E-Commerce Market Size") replace		
	`ba', replace
			
	rm "$table/tmp.tex" 
		
	
end


*==============================================================================
* DinD specification: extended periods with discrete regression * Table 8
*===============================================================================
cap program drop reg_dind_extendperiod
program define reg_dind_extendperiod
args def dummy est no

	local fe "city_product"
	if "`fe'" == "city_product"{
		local fixed_effect = "citydum_t product_t"
	}
	if "`fe'" == "product"{
		local fixed_effect = "product_t"
	}
	if "`dummy'"=="d_1997" local suff2 ""
	if "`dummy'"=="METI_size" local suff2 "Msize_"
	if "`dummy'"=="METI_share" local suff2 "Mshare_"
	

		est clear
			
		* annual regression /*table 6 column 1-3*/
		if "`def'" == "x_rakuten" {
			annual_reg 1992 2016 `def' "`fe'" tr `est' "`dummy'"
		}
		else{
		
			annual_reg 1992 2016 x_NS2004 "`fe'" tr `est' "`dummy'"
			annual_reg 1992 2016 x_NS2009 "`fe'" tr `est' "`dummy'"
			annual_reg 1992 2016 x_NS2014 "`fe'" tr `est' "`dummy'"
		}
		
		* annual regression discrete /*table 6 column 4-6*/
		if "`def'" == "x_rakuten" {
			annual_reg_discrete 1992 2016 `def' "`fe'" tr `est' I2 "`dummy'"
		}
		else {
			annual_reg_discrete 1992 2016 x_NS2004 "`fe'" tr `est' I2 "`dummy'"
			annual_reg_discrete 1992 2016 x_NS2009 "`fe'" tr `est' I2 "`dummy'"
			annual_reg_discrete 1992 2016 x_NS2014 "`fe'" tr `est' I2 "`dummy'"
		}
		
		* label variables
		label var delta_avgp  "\$\Delta$ \$ p_{ict}$"
		label var avglp1 "Lagged Price"
		label var xlp "E-Commerce Intensity \$\times$ Lagged Price"
		label var d_xlp1 "\$ D_{t}$ \$\times$ E-Commerce Intensity \$\times$ Lagged Price"
		label var d_p "\$ D_{t}$ \$\times$ Lagged Price "
		label var I21lp "Low E-Commerce Intensity \$\times$ Lagged Price"
		label var I22lp "High E-Commerce Intensity \$\times$ Lagged Price"
		label var d_I21lp " \$ D_{t}$ \$\times$ Low E-Commerce Intensity \$\times$ Lagged Price"
		label var d_I22lp "\$ D_{t}$ \$\times$ High E-Commerce Intensity \$\times$ Lagged Price"
		
		
		if "`fe'" == "city_product"{
			local suff = ""
		}
		if "`fe'" == "product"{
			local suff = "pd_"
		}
		
		if "`est'" == "ols"{
			local no _a4
			esttab using "$table/table`no'.tex", order(avglp1 xlp d_p d_xlp1 I2* d_I2*) keep(avglp1 xlp d_xlp1 d_p I2* d_I2*) b(3) se(3) ar2(3) ///
			nolegend nonotes star(* 0.10 ** 0.05 *** 0.01) replace label wrap substitute(\_ _) ///
			stats(pr N x_t r_squared, labels("\$t$" "Observations" "E-Commerce Intensity Year" "\$R^{2}$") fmt(%50 %9.0fc %50 %9.2fc))
		}	
		if "`est'" == "2sls"{
				esttab using "$table/table`no'.tex", order(avglp1 xlp d_p d_xlp1 I2* d_I2*) keep(avglp1 xlp d_xlp1 d_p I2* d_I2*) b(3) se(3) ar2(3) ///
				nolegend nonotes star(* 0.10 ** 0.05 *** 0.01) replace label wrap substitute(\_ _) ///
				stats(pr N x_t fs iv, labels("\$t$" "Observations" "E-Commerce Intensity Year" "First-stage F" "Estimation") fmt(%50 %9.0fc %50 %9.2fc %50)) 
		}
		
		local ab filefilter "$table/table`no'.tex" "$table/tmp.tex" 
		local ba filefilter "$table/tmp.tex" "$table/table`no'.tex" 
		
		`ab', from ("                    &\BSmulticolumn{1}{c}{$\BSDelta p_{ict}$}") to ("Dependent Variable&\BSmulticolumn{1}{c}{$\BSDelta p_{ict}$}") replace
		`ba', from ("Annual&      Annual&      Annual&      Annual&      Annual&      Annual\BS\BS") to ("\BSmulticolumn{6}{c}{Annual}\BS\BS\n&\BSmulticolumn{6}{c}{1991-2016}\BS\BS") replace
		`ab', from ("\BSbegin{tabular}") to ("\BSadjustbox{max width=\BStextwidth}{\n\BSbegin{tabular}") replace
		`ba', from ("\BSend{tabular}") to ("\BSend{tabular}\n}") replace	
		
		rm "$table/tmp.tex" 
	
end

*==============================================================================
* DinD specification scatter plots
cap program drop reg_dind_scatter
program define reg_dind_scatter
args y1 y2 def
	
	use "$Data/est_ready.dta", clear
	keep if TN_DW == 0
	
	* identify quartile
	keep RPS x_NS2004 x_NS2009 x_NS2014
	gduplicates drop
	
	sum `def', d
	local topq `r(p75)'
	local botq `r(p25)'
	
	cd "$Data/pricereg"
	use "`y1'_`y2'_reg.dta", clear
	
	* keep tradables
	keep if TN_DW == 0

	* generate variable for fixed effects that vary for each year
	egen citydum_t = group(cityid_RPS year1)
	egen product_t = group(RPS year1)
	
	qui reg delta_avgp i.citydum_t i.product_t
	predict delta_avgp_p, xb
	gen delta_avgp_adj = delta_avgp - delta_avgp_p
	
	qui reg avglp1 i.citydum_t i.product_t
	predict avglp1_p, xb
	gen avglp1_adj = avglp1 - avglp1_p
	
	qui reg delta_avgp_adj avglp1_adj
	local slope : display %9.3f `=_b[avglp1_adj]'
	local se : display %9.3f `=_se[avglp1_adj]'  
	local rsquared : display %9.3f `e(r2)'
	predict fit_delta_avgp, xb
	
	gen n_avglp1_adj = -avglp1_adj

	twoway (scatter delta_avgp_adj avglp1_adj, mcolor(gs8) msize(vsmall) ylabel(, format(%9.1f) angle(0))) ///
	(scatter n_avglp1_adj avglp1_adj , c(l) msymbol(i) lcolor(black) lwidth(thick) sort(avglp1_adj)) ///
	(scatter fit_delta_avgp avglp1_adj , c(l) msymbol(i) lcolor(gs1) lwidth(thick) lpattern(dash) sort(avglp1_adj)) ///
	, ///
	title("All Products `y1'-`y2'", size(large)) ///
	ytitle("Normalized Price Change", size(medium)) ///
	xtitle("Normalized Price", size(medium)) ///
	graphregion(color(white)) legend(lab(2 "45 degree line") lab(3 "Fitted values") order(2 3) size(medsmall)) ///
	note("Slope:`slope'; SE:`se'; R{superscript:2}:`rsquared'.", size(med)) ///
	name(scatter_adj_p_`y1'_`y2', replace)
	
	preserve
		keep if `def' < `botq'
		drop n_avglp1_adj fit_delta_avgp delta_avgp_* avglp1_*
		
		qui reg delta_avgp i.citydum_t i.product_t
		predict delta_avgp_p, xb
		gen delta_avgp_adj = delta_avgp - delta_avgp_p
		
		qui reg avglp1 i.citydum_t i.product_t
		predict avglp1_p, xb
		gen avglp1_adj = avglp1 - avglp1_p
		
		reg delta_avgp_adj avglp1_adj
		local slope : display %9.3f `=_b[avglp1_adj]'
		local se : display %9.3f `=_se[avglp1_adj]'  
		local rsquared : display %9.3f `e(r2)'
		predict fit_delta_avgp, xb
		
		gen n_avglp1_adj = -avglp1_adj

		
		twoway (scatter delta_avgp_adj avglp1_adj, mcolor(gs8) msize(vsmall) ylabel(, format(%9.1f) angle(0))) ///
		(scatter n_avglp1_adj avglp1_adj , c(l) msymbol(i) lcolor(black) lwidth(thick) sort(avglp1_adj)) ///
		(scatter fit_delta_avgp avglp1_adj , c(l) msymbol(i) lcolor(gs1) lwidth(thick) lpattern(dash) sort(avglp1_adj)) ///
		, ///
		title("Bottom Quartile of E-Commerce Intensity `y1'-`y2'", size(large)) ///
		ytitle("Normalized Price Change", size(medium)) ///
		xtitle("Normalized Price",  size(medium)) ///
		graphregion(color(white)) legend(lab(2 "45 degree line") lab(3 "Fitted values") order(2 3) size(medsmall)) ///
		note("Slope:`slope'; SE:`se'; R{superscript:2}:`rsquared'.", size(med)) ///
		name(scatter_adj_p_smallx_`y1'_`y2', replace)
		
	restore
	
	preserve
		keep if `def' > `topq' & `def' ~= .
		drop n_avglp1_adj fit_delta_avgp delta_avgp_* avglp1_*
		
		qui reg delta_avgp i.citydum_t i.product_t
		predict delta_avgp_p, xb
		gen delta_avgp_adj = delta_avgp - delta_avgp_p
		
		qui reg avglp1 i.citydum_t i.product_t
		predict avglp1_p, xb
		gen avglp1_adj = avglp1 - avglp1_p
		
		reg delta_avgp_adj avglp1_adj
		local slope : display %9.3f `=_b[avglp1_adj]'
		local se : display %9.3f `=_se[avglp1_adj]'  
		local rsquared : display %9.3f `e(r2)'
		predict fit_delta_avgp, xb
		
		gen n_avglp1_adj = -avglp1_adj

		
		twoway (scatter delta_avgp_adj avglp1_adj, mcolor(gs8) msize(vsmall) ylabel(, format(%9.1f) angle(0))) ///
		(scatter n_avglp1_adj avglp1_adj , c(l) msymbol(i) lcolor(black) lwidth(thick) sort(avglp1_adj)) ///
		(scatter fit_delta_avgp avglp1_adj , c(l) msymbol(i) lcolor(gs1) lwidth(thick) lpattern(dash) sort(avglp1_adj)) ///
		, ///
		title("Top Quartile of E-Commerce Intensity `y1'-`y2'", size(large))  ///
		ytitle("Normalized Price Change", size(medium)) ///
		xtitle("Normalized Price", size(medium)) ///
		graphregion(color(white))  legend(lab(2 "45 degree line") lab(3 "Fitted values") order(2 3) size(medsmall)) ///
		note("Slope:`slope'; SE:`se'; R{superscript:2}:`rsquared'.", size(med)) ///
		name(scatter_adj_p_bigx_`y1'_`y2', replace)
		
	restore
end

*===============================================================================
* Relative Price Change* Table 3,4, and 5
*===============================================================================
cap program drop relative_price_change
program define relative_price_change
args def dummy no
	
	est clear
	if "`dummy'"=="d_1997" local suff2 ""
	if "`dummy'"=="METI_size" local suff2 "_Msize"
	if "`dummy'"=="METI_share" local suff2 "_Mshare"
	
	* 1992-2001 tradable goods sample
	use "$Data/pricereg/1991_1992_reg.dta", clear
	forvalues y1 = 1992/2000 {
		qui append using "$Data/pricereg//`y1'_`=`y1'+1'_reg.dta"
	}
	
	drop if TN_DW == 1

	* limit to products and cities that exist in all relevant years
	forvalues year = 1992/2001 {
		cap qui gen b_city_`year' = 1 if (year2 == `year')& avglp1 ~= .
		sort cityid_RPS b_city_`year'
		cap qui by cityid_RPS: replace b_city_`year' = b_city_`year'[_n-1] if missing(b_city_`year') & _n~= 1
		
		cap qui gen b_product_`year' = 1 if (year2 == `year') & avglp1 ~= .
		sort RPS b_product_`year'
		cap qui by RPS: replace b_product_`year' = b_product_`year'[_n-1] if missing(b_product_`year') & _n~= 1
	}

	ds b_product* b_city*
	foreach var in `r(varlist)'{
		qui keep if `var' == 1
	}
	drop b_product* b_city*

	* generate dummy and fixed effect groups
	if "`dummy'"=="d_1997"{
		qui gen dummy = 1 if year2>= 1997
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_size"{
		qui gen dummy = METI_log_size
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_share"{
		qui gen dummy = METI_share
		qui replace dummy = 0 if dummy == .
	}
	egen city_gr=group(cityid_RPS)  
	egen product_gr = group(RPS)
	
	gen d_x = dummy*`def'
	gen d_xcat = dummy*xcat_NS1999
	gen d_p = dummy*avglp1
	
	if "`def'" == "x_rakuten"{
		local year 2010
	}
	else{
		local year `=substr("`def'",-4,4)'
	}
	
	*column 1 1992-2001 ols items fixed effect
	reghdfe delta_avgp dummy d_x, absorb(product_gr) vce(cluster cityid_RPS RPS)
	eststo inflation_1_a
	estadd local fe "Product"
	estadd local sample "Goods"
	estadd local x_t "`year'"
	estadd local t "1992-2001"
	estadd local twosls "OLS"
	estadd scalar r_squared `e(r2)'
	
	
	*column 3 1992-2001 2sls items fixed effect
	eststo: ivreghdfe delta_avgp dummy (d_x=d_xcat), absorb(product_gr) cluster(cityid_RPS RPS) savefirst first savefprefix(inflation_1_b_st1)
	eststo inflation_1_b
	estadd local fe "Product": inflation_1_b*
	estadd local sample "Goods": inflation_1_b*
	estadd local x_t "`year'": inflation_1_b*
	estadd local t "1992-2001": inflation_1_b*
	local fst = e(widstat)
	estadd scalar fs = `fst': inflation_1_b*
	estadd scalar f1s = `fst': inflation_1_b_st1*
	estadd local twosls "IV"
	estadd local twosls "IV-First Stage": inflation_1_b_st1*
	mat fstat = e(first)
	local pr2 fstat[2,1]
	local r_squared : display %9.2f `pr2'
	estadd local r_squared `r_squared': inflation_1_b_st1*

	* 1992-2016 tradable goods sample
	use "$Data/pricereg/1991_1992_reg.dta", clear
	forvalues y1 = 1992/2015 {
		qui append using "$Data/pricereg//`y1'_`=`y1'+1'_reg.dta"
	}
	
	drop if TN_DW == 1

	* limit to products and cities that exist in all relevant years
	forvalues year = 1992/2016 {
		cap qui gen b_city_`year' = 1 if (year2 == `year')& avglp1 ~= .
		sort cityid_RPS b_city_`year'
		cap qui by cityid_RPS: replace b_city_`year' = b_city_`year'[_n-1] if missing(b_city_`year') & _n~= 1
		
		cap qui gen b_product_`year' = 1 if (year2 == `year') & avglp1 ~= .
		sort RPS b_product_`year'
		cap qui by RPS: replace b_product_`year' = b_product_`year'[_n-1] if missing(b_product_`year') & _n~= 1
	}

	ds b_product* b_city*
	foreach var in `r(varlist)'{
		qui keep if `var' == 1
	}
	drop b_product* b_city*

	* generate dummy and fixed effect groups
	if "`dummy'"=="d_1997"{
		qui gen dummy = 1 if year2>= 1997
		qui replace dummy = 0 if dummy == .
	}
	if "`dummy'"=="METI_size"{
		qui gen dummy = METI_log_size
		qui replace dummy = 0 if dummy == .
	}
	egen city_gr=group(cityid_RPS)  
	egen product_gr = group(RPS)
	
	gen d_x = dummy*`def'
	gen d_xcat = dummy*xcat_NS1999
	gen d_p = dummy*avglp1
	
	if "`def'" == "x_rakuten"{
		local year 2010
	}
	else{
		local year `=substr("`def'",-4,4)'
	}
	
	*column 2 1992-2016 ols item fixed effect
	reghdfe delta_avgp dummy d_x, absorb(product_gr, save) vce(cluster cityid_RPS RPS)
	eststo inflation_2_a
	estadd local fe "Product"
	estadd local sample "Goods"
	estadd local x_t "`year'"
	estadd local t "1992-2016"
	estadd local twosls "OLS"
	estadd scalar r_squared `e(r2)'
	
	*column 4 1992-2016 2sls item fixed effect
	eststo: ivreghdfe delta_avgp dummy (d_x=d_xcat), absorb(product_gr) cluster(cityid_RPS RPS) savefirst first savefprefix(inflation_2_b_st1)
	eststo inflation_2_b
	estadd local fe "Product": inflation_2_b*
	estadd local sample "Goods": inflation_2_b*
	estadd local x_t "`year'": inflation_2_b*
	estadd local t "1992-2016": inflation_2_b*
	*estadd scalar fs = e(widstat): inflation_2_b*
	*estadd scalar f1s = `fs': inflation_2_b_st1*
	local fst = e(widstat)
	estadd scalar fs = `fst': inflation_2_b*
	estadd scalar f1s = `fst': inflation_2_b_st1*
	estadd local twosls "IV"
	estadd local twosls "IV-First Stage": inflation_2_b_st1*
	mat fstat = e(first)
	local pr2 fstat[2,1]
	local r_squared : display %9.2f `pr2'
	estadd local r_squared `r_squared': inflation_2_b_st1*
	
	* Cross section regression
	use "$Data/pricereg//2008_2009_reg.dta", clear
	keep `def' xcat_NS1999 RPS
	drop if `def' == . | xcat_NS1999 == .
	duplicates drop RPS, force
	reg `def' xcat_NS1999
	eststo inflation_3
	estadd local fe "None"
	estadd local sample "Goods"
	estadd local x_t "`year'"
	estadd local t ""
	estadd local twosls "OLS"
	estadd scalar r_squared `e(r2)'
	
	foreach var in dummy d_x delta_avgp d_xcat d_p {
		qui gen `var' = .
	}
	
	
	if "`dummy'"=="d_1997" {
		
		label var dummy "\$D_{t}$"
		label var d_x "E-Commerce Intensity \$\times$ \$ D_{t}$"
		label var d_p "Lagged Price \$\times$ \$ D_{t}$"
		label var d_xcat "Catalog Intensity \$\times$ \$ D_{t}$"
	}
	
	if "`dummy'"=="METI_size" {
		
		label var dummy "EC Market Size"
		label var d_x "E-Commerce Intensity \$\times$ EC Market Size"
		label var d_p "Lagged Price \$\times$ EC Market Size"
		label var d_xcat "Catalog Intensity \$\times$ EC Market Size"
	}
	
	label var delta_avgp  "\$\Delta$ $ p_{ict}$"
	label var `def' "E-Commerce Intensity"
	label var xcat_NS1999 "Catalog Intensity"
	
	esttab inflation_1_a inflation_2_a inflation_1_b inflation_2_b ///
		using "$table/table`no'.tex", ///
		keep(dummy d_x) order(dummy d_x) b(4) se(4) ar2(a3) ///
		nolegend nonotes star(* 0.10 ** 0.05 *** 0.01) replace label wrap substitute(\_ _) ///
		stats(fe t N r_squared fs twosls, ///
		labels("Fixed Effects" "Estimation Period" "Observations" "\$R^{2}$" "First-Stage F-Stat" "Estimation Method") ///
		fmt(%50 %50 %9.0fc %9.2fc %9.2fc %50))
	
	local ab filefilter "$table/table`no'.tex" "$table/tmp.tex"
	local ba filefilter "$table/tmp.tex" "$table/table`no'.tex"
	
	`ab', from ("\BSbegin{tabular}") to ("\BSadjustbox{max width=\BStextwidth}{\n\BSbegin{tabular}") replace
	`ba', from ("\BSend{tabular}") to ("\BSend{tabular}\n}") replace
	`ab', from ("times&") to ("times\$&") replace
	`ba', from ("\nD_") to ("\n\$D_}") replace
	`ab', from ("EC Market Size") to ("Log E-Commerce Market Size") replace
	`ba', replace 	
	rm "$table/tmp.tex"
	
	if "`dummy'"=="d_1997" {
	
		if "`no'" == "3" {
			local nof 5
		}
		if "`no'" == "_a5" {
			local nof _a6
		}
		
		esttab inflation_1_b_st1* inflation_2_b_st1* inflation_3 ///
			using "$table/table`nof'.tex", ///
			b(4) se(4) ar2(a3) ///
			nolegend nonotes star(* 0.10 ** 0.05 *** 0.01) replace label wrap substitute(\_ _) ///
			stats(fe t N r_squared f1s twosls, labels("Fixed Effects" "Estimation Period" "Observations" "\$R^{2}$" "First-Stage F-Stat" "Estimation Method") fmt(%50 %50 %9.0fc %9.2fc %9.2fc %50))
		
		local ab filefilter "$table/table`nof'.tex" "$table/tmp.tex"
		local ba filefilter "$table/tmp.tex" "$table/table`nof'.tex"
		
		`ab', from ("\BSbegin{tabular}") to ("\BSadjustbox{max width=\BStextwidth}{\n\BSbegin{tabular}") replace
		`ba', from ("\BSend{tabular}") to ("\BSend{tabular}\n}") replace
		`ab', replace ///
		from("&\BSmulticolumn{1}{c}{E-Commerce Intensity $\BStimes D_{t}$}&\BSmulticolumn{1}{c}{E-Commerce Intensity $\BStimes D_{t}$}&\BSmulticolumn{1}{c}{E-Commerce Intensity}\BS\BS") ///
		to("&\BSmulticolumn{1}{c}{E-Commerce}&\BSmulticolumn{1}{c}{E-Commerce}&\BSmulticolumn{1}{c}{E-Commerce}\BS\BS\n                    &\BSmulticolumn{1}{c}{Intensity $\BStimes D_{t}$}&\BSmulticolumn{1}{c}{Intensity $\BStimes D_{t}$}&\BSmulticolumn{1}{c}{Intensity}\BS\BS")
		`ba', replace
		rm "$table/tmp.tex"
	
	}
	
end

*===============================================================================
* Plot Figure A.1
*===============================================================================
cap program drop plotfigure_a1
program define plotfigure_a1

use "$Data/pretrend_pi.dta", clear	
	
***not balanced panel***
keep if year > 1990
drop if year >2020
bys RPS: egen temp = count(pi)
sum temp
*keep if temp == `r(max)'
*drop temp

sort RPS year
bys RPS: gen first = 1 if _n == 1

local def x_NS2009
sum `def' if first == 1, d
gen bot_quartile = 1 if `def' < `r(p25)'
gen top_quartile = 1 if `def' > `r(p75)' & `def' ~= .

gen qind = 1 if bot_quartile == 1
replace qind =2 if top_quartile == 1

collapse (mean) pi epi [aw=cpi_weight], by(year qind)

label define qindcator 1 "Bottom Q" 2 "Top Q", modify
label values qind qindicator
drop if missing(qind)

reshape wide pi epi, i(year) j(qind)

foreach var in pi epi {
	rename `var'1 `var'_bot
	rename `var'2 `var'_top
	label var `var'_bot "`var' bottom quartile"
	label var `var'_top "`var' top quartile"
}
drop if missing(year)

*add observation base year
set obs `=_N+1'
qui sum year
local b `r(min)'
local c `r(max)'
replace year = `b' -1 if missing(year)
tsset year

foreach var in P eP{
	gen `var'_bot = 1 if year == `b' -1
	gen `var'_top = 1 if year == `b' -1
} 
sort year

replace P_bot = exp(pi_bot/100)*P_bot[_n-1] if missing(P_bot)
replace P_top = exp(pi_top/100)*P_top[_n-1] if missing(P_top)

replace eP_bot = (1+epi_bot/100)*P_bot[_n-1] if missing(eP_bot)
replace eP_top = (1+epi_top/100)*P_top[_n-1] if missing(eP_top)

foreach var in P_bot P_top eP_bot eP_top {
	gen temp`var' = `var' if year == 1997
	egen temp2`var' = total(temp`var')
	gen n`var' = `var'/temp2`var'
}

*check*log approximation of inflation*
foreach var in P_bot P_top nP_bot nP_top {
	gen temppi_`var' =ln(`var'/L.`var')*100
}
gen check = temppi_P_bot - temppi_nP_bot
replace check = round(check,0.0001)
qui sum check
assert `r(mean)' == 0

*check*log approximation of inflation*
foreach var in eP_bot eP_top neP_bot neP_top {
	gen temppi_`var' =(`var'/L.`var'-1)*100
}
gen checkep = temppi_eP_top - temppi_neP_top
replace checkep = round(checkep,0.0001)
qui sum checkep
assert `r(mean)' == 0
drop temp* check*

qui sum year
drop if year == `r(min)'

qui sum year
local b `r(min)'
local c `r(max)'

merge 1:1 year using "$Data/pretrend_category_pi.dta", keep(3)

label var nP20 "Electronics Price Level (1997 = 1)"
twoway (scatter neP_bot year, lcolor(red) lwidth(thick) c(l) m(i)) (scatter neP_top year, lcolor(black) lwidth(thick) lp(dash) c(l) m(i)) ///
(scatter nP20 year, lcolor(black) lwidth(thick) lp(dash_dot) c(l) m(i) yaxis(2)), ///
ti("Products Grouped by E-Commerce Intensity") ///
xtitle("Year", size(medium)) graphregion(color(white)) ylabel(, angle(0) format(%9.1f)) ylabel(, angle(0) axis(2) format(%9.1f)) ytitle("Price Level (1997 = 1)") ///
legend(lab(1 "Bottom-quartile products (left axis)") lab(2 "Top-quartile products (left axis)") ///
lab(3 "Electronics (right axis)") order(1 2 3) size(small) bmargin(none) keygap(0.4) symxsize(11)) ///
xline(1997, lcolor(black)) xsc(r(`b' `c')) xlabel(1992 1997 2002 2007 2012 2017) 
graph export "$figure/figure_a1_left.pdf", replace


********************************************************************************************************
** catalog intensity
use "$Data/pretrend_pi.dta", clear	
	
***not balanced panel***
keep if year > 1990
drop if year >2020
bys RPS: egen temp = count(pi)
sum temp
*keep if temp == `r(max)'
*drop temp

sort RPS year
bys RPS: gen first = 1 if _n == 1

local def xcat_NS1999
sum `def' if first == 1, d
gen bot_quartile = 1 if `def' < `r(p25)'
gen top_quartile = 1 if `def' > `r(p75)' & `def' ~= .

gen qind = 1 if bot_quartile == 1
replace qind =2 if top_quartile == 1

collapse (mean) pi epi [aw=cpi_weight], by(year qind)

label define qindcator 1 "Bottom Q" 2 "Top Q", modify
label values qind qindicator
drop if missing(qind)

reshape wide pi epi, i(year) j(qind)

foreach var in pi epi {
	rename `var'1 `var'_bot
	rename `var'2 `var'_top
	label var `var'_bot "`var' bottom quartile"
	label var `var'_top "`var' top quartile"
}
drop if missing(year)

*add observation base year
set obs `=_N+1'
qui sum year
local b `r(min)'
local c `r(max)'
replace year = `b' -1 if missing(year)
tsset year

foreach var in P eP{
	gen `var'_bot = 1 if year == `b' -1
	gen `var'_top = 1 if year == `b' -1
} 
sort year

replace P_bot = exp(pi_bot/100)*P_bot[_n-1] if missing(P_bot)
replace P_top = exp(pi_top/100)*P_top[_n-1] if missing(P_top)

replace eP_bot = (1+epi_bot/100)*P_bot[_n-1] if missing(eP_bot)
replace eP_top = (1+epi_top/100)*P_top[_n-1] if missing(eP_top)

foreach var in P_bot P_top eP_bot eP_top {
	gen temp`var' = `var' if year == 1997
	egen temp2`var' = total(temp`var')
	gen n`var' = `var'/temp2`var'
}

*check*log approximation of inflation*
foreach var in P_bot P_top nP_bot nP_top {
	gen temppi_`var' =ln(`var'/L.`var')*100
}
gen check = temppi_P_bot - temppi_nP_bot
replace check = round(check,0.0001)
qui sum check
assert `r(mean)' == 0

*check*log approximation of inflation*
foreach var in eP_bot eP_top neP_bot neP_top {
	gen temppi_`var' =(`var'/L.`var'-1)*100
}
gen checkep = temppi_eP_top - temppi_neP_top
replace checkep = round(checkep,0.0001)
qui sum checkep
assert `r(mean)' == 0
drop temp* check*

qui sum year
drop if year == `r(min)'


twoway (scatter neP_bot year, lcolor(red) lwidth(thick) c(l) m(i)) (scatter neP_top year, lcolor(black) lwidth(thick) lp(dash) c(l) m(i)), ///
xtitle("Year", size(medium)) graphregion(color(white)) ylabel(, angle(0) format(%9.1f)) ytitle("Price Level (1997 = 1)") ///
ti("Products Grouped by Catalog Sales Intensity") ///
legend(lab(1 "Bottom-quartile products") lab(2 "Top-quartile products") order(1 2) size(small)) ///
xline(1997, lcolor(black)) xsc(r(`b' `c')) xlabel(1992 1997 2002 2007 2012 2017) 
graph export "$figure/figure_a1_right.pdf", replace



end 



*===============================================================================
* Consumer gain due to increase variety
cap program drop variety_gain
program define variety_gain

	*** National Level Results***
	use "$Data/NSFIE_totalhh_national.dta", clear

	* chi_t is calculated as total expenditure in NSFIE survey minus the sum of expenditure for all purchase type, ie what's not detailed in the survey
	gen chi_1999 = 0.563 /*for 2p household*/
	gen chi_2014 = 0.62 /*for total household*/ 
	
	* Internet expenditure share from METI
	gen s_meti_2014 = 0.0437

	* Share of internet and catalog expenditure in 1999 and 2014
	foreach year in 1999 2014 {
		egen tmp1 = total(avg_exp) if year == `year' & type_purchase == "Internet"
		egen tmp2 = total(avg_exp) if year == `year' & type_purchase == "Catalog"
		egen tmp3 = total(avg_exp) if year == `year' & type_purchase == "All"
		egen share_internet_`year' = max(tmp1)
		egen share_catalog_`year' = max(tmp2)
		egen all_exp_`year' = max(tmp3)
		replace share_internet_`year' = share_internet_`year'/all_exp_`year'
		replace share_catalog_`year' = share_catalog_`year'/all_exp_`year'
		drop tmp*
	}
	replace share_internet_1999 = 0 /*NS1999 does not list internet expenditure separately*/

	keep chi_1999 chi_2014 s_meti_2014 share_internet_1999 share_catalog_1999 share_internet_2014 share_catalog_2014
	duplicates drop

	* Calculate lambda and welfare changes
	* note: We don't have 2017 data from NSFIE, therefore we assumes 2014 value
	gen lambda_e_14 = (1-(s_meti_2014))*chi_2014+1-chi_2014

	gen lambda_t_99 = (1-(share_catalog_1999))*chi_1999+1-chi_1999
	gen lambda_t_14 = (1-(s_meti_2014+share_catalog_2014))*chi_2014+1-chi_2014

	forvalues sigma = 3/7 {
		gen dlnP_e_14`sigma' = 1/(`sigma'-1)*log(lambda_e_14)
		gen dlnP_t_14`sigma' = 1/(`sigma'-1)*log(lambda_t_14/lambda_t_99)
	}

	keep dlnP_*
	gen order = _n
	reshape long dlnP_e_14 dlnP_t_14, i(order) j(sigma)


	*** Prefecture Level Results in 2014***
	use "$Data/NSFIE_2014.dta", clear

	keep if purchase_location == "All"

	* chi_t is calculated as total expenditure in NSFIE survey minus the sum of expenditure for all purchase type, ie what's not detailed in the survey
	gen chi_1999 = 0.563 /*for 2p household*/
	gen chi_2014 = 0.62 /*for total household*/ 
	
	* Internet expenditure share from METI
	gen s_meti_2014 = 0.0437

	* Share of internet and catalog expenditure at prefecture level
	egen tmp1 = total(avg_exp) if type_purchase == "(7) Mail-order selling （Internet）", by(region)
	egen tmp2 = total(avg_exp) if type_purchase == "(8) Mail-order selling （Others）", by(region)
	egen tmp3 = total(avg_exp) if type_purchase == "All", by(region)
	egen share_internet_p = max(tmp1), by(region)
	egen share_catalog_p = max(tmp2), by(region)
	egen all_exp_p = max(tmp3), by(region)
	replace share_internet_p = share_internet_p/all_exp_p
	replace share_catalog_p = share_catalog_p/all_exp_p
	drop tmp*

	* Scale share internet by METI share over national share
	egen tmp1 = total(avg_exp) if type_purchase == "(7) Mail-order selling （Internet）" & region == "Japan"
	egen tmp3 = total(avg_exp) if type_purchase == "All" & region == "Japan"
	egen share_internet_n = max(tmp1)
	egen all_exp_n = max(tmp3)
	replace share_internet_n = share_internet_n/all_exp_n
	drop tmp*

	gen scaling_factor = s_meti_2014/share_internet_n
	replace share_internet_p = share_internet_p*scaling_factor

	keep region region_name chi_2014 s_meti_2014 share_internet_p share_catalog_p share_internet_n scaling_factor chi_1999 chi_2014
	duplicates drop

	* Get catalog share in 1999
	preserve
		use "$Data/NSFIE_1999.dta", clear

		keep if purchase_location == "All"

		* share of internet and catalog expenditure at prefecture level
		egen tmp2 = total(avg_exp) if type_purchase == "(7) Mail-order selling", by(region)
		egen tmp3 = total(avg_exp) if type_purchase == "All", by(region)
		egen share_catalog_p_1999 = max(tmp2), by(region)
		egen all_exp_p = max(tmp3), by(region)
		replace share_catalog_p_1999 = share_catalog_p_1999/all_exp_p
		drop tmp*
		
		keep region share_catalog_p
		duplicates drop
		save tmp, replace
	restore

	merge 1:1 region using tmp, keep(3) nogen
	rm tmp.dta

	* Calculate lambda and welfare changes
	gen lambda_e_14 = (1-(share_internet_p))*chi_2014+1-chi_2014

	forvalues sigma = 3/7 {
		gen dlnP_e`sigma' = 1/(`sigma'-1)*log(lambda_e_14)
	}

	* keep the prefecture data
	replace region = trim(region)
	keep if strpos(region,"-ken")|strpos(region,"-to")|strpos(region,"-fu")|region=="Hokkaido"
	
	
	* generate region label
	rename region prefecture
	gen order = _n
	gen label_region = "{    "+trim(prefecture)+"}"
	qui sum order
	forvalues i = 1/`r(N)'{
		label define prefecture_id `=order[`i']' "`=label_region[`i']'", modify
	}
	label values order prefecture_id
	
	* merge with city labels
	preserve
		import excel using "$Data/cityid.xls", sheet("prefecture") first clear
		save tmp.dta, replace
	restore
	merge 1:1 prefecture using tmp.dta, assert(3) nogen
	rm tmp.dta
	
	* merge in prefecture characteristics
	*population, education level, income per capita, and average age
	merge 1:1 prefecture using "$Data/pf_controls.dta", assert(3) nogen

	* calculate share of college educated
	gen share_edu_higher = higher/population_over_15

	gen prefecture_label = substr(prefecture,1,strpos(prefecture,"-")-1) if strpos(prefecture,"-")>0
	replace prefecture_label=prefecture if strpos(prefecture,"-")==0

	qui reg dlnP_e4 share_edu_higher
	local slope : di %9.3f `=_b[share_edu_higher]'
	local se : di %9.3f `=_se[share_edu_higher]'
	local rsquared : di %9.3f `e(r2)' 
	predict fit_1, xb

	
	twoway (scatter fit_1 share_edu_higher, c(l) msymbol(i) lcolor(black) lwidth(thick) sort(share_edu_higher)) ///
		(scatter dlnP_e4 share_edu_higher, mc(black) mlabc(black) mlabel(city_marker_label)  mlabsize(vsmall) mlabposition(12)) ///
	, ///
	ytitle("Log Price Change Due to New Varieties 1996-2014", size(small)) ylabel(,angle(0) format(%9.3f)) ///
	xtitle("Share of College Education", size(small)) xlabel(, format(%9.2f)) ///
	graphregion(color(white)) legend(lab(2 "Fitted values") order(2) size(medsmall)) ///
	note("Slope:`slope'; SE:`se'; R{superscript:2}:`rsquared'.", size(med)) legend(off)
	graph export "$figure/figure5.pdf", replace
	

	* Regress on different controls *
	gen share_secondary = secondary/population_over_15
	
	gen ln_income_per_person = ln(income_per_person)
	gen ln_population = ln(population)
	gen ln_avg_age =ln(avg_age)
	
	eststo clear

	eststo: reg dlnP_e4 share_edu_higher
	eststo welfare_control_1

	eststo: reg dlnP_e4 share_edu_higher ln_population
	eststo welfare_control_2

	eststo: reg dlnP_e4 share_edu_higher ln_income_per_person
	eststo welfare_control_3

	eststo: reg dlnP_e4 share_edu_higher ln_avg_age
	eststo welfare_control_4

	eststo: reg dlnP_e4 share_edu_higher share_secondary
	eststo welfare_control_5

	eststo: reg dlnP_e4 share_edu_higher ln_population ln_income_per_person ln_avg_age share_secondary
	eststo welfare_control_6

	label var dlnP_e4 "\$\Delta lnP_{14}^{E}$"
	label var share_edu_higher "Share of College Educated"
	label var ln_population "ln(Population)"
	label var ln_income_per_person "ln(Income per Capita)"
	label var ln_avg_age "ln(Average Age)"
	label var share_secondary "Share of Secondary Educated"

	esttab welfare_control_1 welfare_control_2 welfare_control_3 welfare_control_4 welfare_control_5  welfare_control_6 ///
			using "$table/table9.tex", ///
			b(3) se(3) r2(a3) ///
			nolegend nonotes star(* 0.10 ** 0.05 *** 0.01) replace label wrap substitute(\_ _) mlabels(none)
	
	filefilter "$table/table9.tex" "$table/tmp.tex", replace ///
		from(" &\BSmulticolumn{1}{c}{(1)}") to("& \BSmulticolumn{6}{c}{Dependent Variable: \$\BSDelta ln(P_{14}^{E})$}\BS\BS\n &\BSmulticolumn{1}{c}{(1)}")
	filefilter "$table/tmp.tex" "$table/table9.tex", replace ///
		from("") to("")

	rm "$table/tmp.tex"
	
end
*==============================================================================
